/*
 *   Byte-oriented AES-256 implementation.
 *   All lookup tables replaced with 'on the fly' calculations.
 */
#include "aes.h"

/* -------------------------------------------------------------------------- */
uint8_t rj_xtime(uint8_t x) {
    return (x & 0x80) ? ((x << 1) ^ 0x1b) : (x << 1);
} /* rj_xtime */

/* -------------------------------------------------------------------------- */
void aes_subBytes(uint8_t *buf) {
    register uint8_t i = 16;

sub:
    while (i--)
        buf[i] = rj_sbox(buf[i]);
} /* aes_subBytes */

/* -------------------------------------------------------------------------- */
void aes_addRoundKey(uint8_t *buf, uint8_t *key) {
    register uint8_t i = 16;

addkey:
    while (i--)
        buf[i] ^= key[i];
} /* aes_addRoundKey */

/* -------------------------------------------------------------------------- */
void aes_addRoundKey_cpy(uint8_t *buf, uint8_t *key, uint8_t *cpk) {
    register uint8_t i = 16;

cpkey:
    while (i--)
        buf[i] ^= (cpk[i] = key[i]), cpk[16 + i] = key[16 + i];
} /* aes_addRoundKey_cpy */

/* -------------------------------------------------------------------------- */
void aes_shiftRows(uint8_t *buf) {
    register uint8_t i, j; /* to make it potentially parallelable :) */

    i = buf[1];
    buf[1] = buf[5];
    buf[5] = buf[9];
    buf[9] = buf[13];
    buf[13] = i;
    i = buf[10];
    buf[10] = buf[2];
    buf[2] = i;
    j = buf[3];
    buf[3] = buf[15];
    buf[15] = buf[11];
    buf[11] = buf[7];
    buf[7] = j;
    j = buf[14];
    buf[14] = buf[6];
    buf[6] = j;

} /* aes_shiftRows */

/* -------------------------------------------------------------------------- */
void aes_mixColumns(uint8_t *buf) {
    register uint8_t i, a, b, c, d, e;

mix:
    for (i = 0; i < 16; i += 4) {
        a = buf[i];
        b = buf[i + 1];
        c = buf[i + 2];
        d = buf[i + 3];
        e = a ^ b ^ c ^ d;
        buf[i] ^= e ^ rj_xtime(a ^ b);
        buf[i + 1] ^= e ^ rj_xtime(b ^ c);
        buf[i + 2] ^= e ^ rj_xtime(c ^ d);
        buf[i + 3] ^= e ^ rj_xtime(d ^ a);
    }
} /* aes_mixColumns */

/* -------------------------------------------------------------------------- */
void aes_expandEncKey(uint8_t *k, uint8_t *rc) {
    register uint8_t i;

    k[0] ^= rj_sbox(k[29]) ^ (*rc);
    k[1] ^= rj_sbox(k[30]);
    k[2] ^= rj_sbox(k[31]);
    k[3] ^= rj_sbox(k[28]);
    *rc = F(*rc);

exp1:
    for (i = 4; i < 16; i += 4)
        k[i] ^= k[i - 4], k[i + 1] ^= k[i - 3], k[i + 2] ^= k[i - 2],
            k[i + 3] ^= k[i - 1];
    k[16] ^= rj_sbox(k[12]);
    k[17] ^= rj_sbox(k[13]);
    k[18] ^= rj_sbox(k[14]);
    k[19] ^= rj_sbox(k[15]);

exp2:
    for (i = 20; i < 32; i += 4)
        k[i] ^= k[i - 4], k[i + 1] ^= k[i - 3], k[i + 2] ^= k[i - 2],
            k[i + 3] ^= k[i - 1];

} /* aes_expandEncKey */

/* -------------------------------------------------------------------------- */
void aes256_encrypt_ecb(aes256_context *ctx, uint8_t k[32], uint8_t buf[16]) {
    // INIT
    uint8_t rcon = 1;
    uint8_t i;

ecb1:
    for (i = 0; i < sizeof(ctx->key); i++) {
        ctx->enckey[i] = ctx->deckey[i] = k[i];
    }
ecb2:
    for (i = 8; --i;) {
        aes_expandEncKey(ctx->deckey, &rcon);
    }

    // DEC
    aes_addRoundKey_cpy(buf, ctx->enckey, ctx->key);
ecb3:
    for (i = 1, rcon = 1; i < 14; ++i) {
        aes_subBytes(buf);
        aes_shiftRows(buf);
        aes_mixColumns(buf);
        if (i & 1)
            aes_addRoundKey(buf, &ctx->key[16]);
        else
            aes_expandEncKey(ctx->key, &rcon), aes_addRoundKey(buf, ctx->key);
    }
    aes_subBytes(buf);
    aes_shiftRows(buf);
    aes_expandEncKey(ctx->key, &rcon);
    aes_addRoundKey(buf, ctx->key);
} /* aes256_encrypt */

